***************************************
***		Do-File to impute wages		***
***		above censoring limit		***
***************************************
* Beobachtungen werden in Zeile 120 auf Betriebsebene aggregiert
* Variable N gibt Zahl der (Personen-)Beobachtungen pro Betriebsjahr an
* jedoch im Folgenden niemals Ergebnisausgabe auf dieser Aggregatebene

clear
set more off
log using "$log/03_wimpute_persons.log", replace
use "$data/linked_1993-2008.dta", clear

*********************************
*** identify censoring limits ***
*********************************

* identify censored observations
gen highwages = tentgelt if tentgelt > 80	/* censoring limit is always > 80 */

* censoring limit is the most frequent wage, to account for rounding errors, slightly (3EUR) below --> make sure this also works for east and west berlin!
bysort jahr: egen bbmg = mode(highwages)
drop highwages


******************************
*** impute censored values ***

gen cwage = tentgelt
replace cwage = bbmg - 3 if cwage >= bbmg - 3

* indicator for censored obs
gen cens=.
replace cens = 1 if cwage >= bbmg - 3
replace cens = 0 if cwage < bbmg - 3

* prepare for miimpute command
gen lolim = ln(cwage)
gen uplim = ln(cwage)
replace uplim = . if cens==1

save "$data/linked_imputed_1993-2008.dta", replace

*******************************************************************
***		imputation: separately by education group and industry	***
*******************************************************************
set more off

foreach bild in 1 2 3 4		{
	foreach ind of numlist 1/7		{

		dis " "
		dis "Bildung:" `bild' ", Industry:" `ind'
		dis " "

		* open dataset
		use "$data/linked_imputed_1993-2008.dta" if bildung==`bild' & w73_1==`ind', clear
		svyset [pw=csweight]	/* this is the right weight */

		* generate special firm-level predictors: proportion censored and avg. wage - separately by edugroup
		bysort estid jahr: gen estsize = _N
		bysort estid jahr: egen propcens = mean(cens)
		bysort estid jahr: egen avglnwage = mean(lolim)

		* indicator for one person establishments, replace with mean of small establishments
		qui svy: mean propcens if estsize !=1 & estsize < 30
		qui replace propcens = _b[propcens] if estsize == 1
		qui svy: mean avglnwage if estsize !=1 & estsize < 30
		qui replace avglnwage = _b[avglnwage] if estsize == 1
		drop estsize

		mi set wide
		/* impute with force option because very few very small occupations cause perfect collinearity. obs in these occs get miss vals */
		mi impute intreg milnwage female foreigner exper exper2 exper3 c.exper#i.female c.exper2#i.female c.exper3#i.female nrftemployees propcens avglnwage i.ertragslage_thisy i.jahr i.beruf i.bula, ll(lolim) ul(uplim) add(1) rseed(37169) force

		replace milnwage = _1_milnwage
		mi unset
		drop milnwage_1_ lolim_1_ uplim_1_ mi_miss lolim uplim
		save "$data/imp_`bild'_`ind'.dta", replace
		}
	}


***	Stack into one, rename vars, unformat mi ***
set more off
clear

foreach bild in 1 2 3 4		{
	foreach ind of numlist 1/7		{
		qui append using "$data/imp_`bild'_`ind'.dta"
		erase "$data/imp_`bild'_`ind'.dta"
		}
	}


*** top code at 3x censoring limit ***
replace milnwage = ln(3*(bbmg - 3)) if milnwage > ln(3*(bbmg - 3))

* save
order estid jahr
sort estid jahr
compress
save "$data/linked_imputed_1993-2008.dta", replace
erase "$data/linked_1993-2008.dta"

*** convert wages into 2010 euros (i.e. into real wages) ***
use "$orig\deu_cpindex_91-13.dta", clear
rename year jahr
merge 1:m jahr using "$data/linked_imputed_1993-2008.dta"
	drop if jahr > 2008 | jahr < 1993
gen mirealwage = exp(milnwage) * (100/cpindex)
replace milnwage = ln(mirealwage)

drop if mirealwage < 20 /* drop persons who make less than EUR 600 per month: too little for a true full time employee */

drop _merge cpindex cpindex_change mirealwage
save "$data/linked_imputed_1993-2008.dta", replace

*** compute (time-constant) longitudinal weights for establishments and save firm-level data ***
bysort estid jahr: egen N = count(milnwage)		/* jedoch niemals Ergebnisausgabe f�r einzelne Betriebe */
bysort estid jahr: keep if _n==1
drop persnr										/* damit diese nicht sp�ter, beim mergen �berschrieben wird */

* generate time-constant establishment weights for FE-models
sort estid jahr
bysort estid: gen wavecount = _n		/* weights must be constant within establishments. use weight from first wave and forward impute later */
gen consnrftempl = .
replace consnrftempl = nrftemployees if wavecount==1

* generate time-constant establishment weights that place greater weight on large establishments
gen weight_size = .
replace weight_size = (csweight * nrftemployees) if wavecount==1
label variable weight_size "longit. est. weight, large ests heavier"

* generate time-constant establishment weights that do not place greater weight on large establishments
gen weight_nosize = .
replace weight_nosize = csweight if wavecount==1
label variable weight_nosize "longit. est. weight, large ests as small ests"

* forward impute these weights from first wave to all other waves (ignores changes in size over time)
local i = 0
while `i' <=20 {
	qui replace weight_size		= weight_size[_n-`i']	if wavecount==`i'+1 & idnum == idnum[_n-`i'] /* sampling based on idnum, not estid */
	qui replace weight_nosize  	= weight_nosize[_n-`i']	if wavecount==`i'+1 & idnum == idnum[_n-`i'] /* sampling based on idnum, not estid */
	qui replace consnrftempl  	= consnrftempl[_n-`i']	if wavecount==`i'+1 & idnum == idnum[_n-`i'] /* sampling based on idnum, not estid */
	local i = `i' + 1
	}

* save as establishment dataset to be used for estimation
drop idnum
compress
save "$data/estsample_1993-2008.dta", replace

*** generate (time-constant) person-level weight for later FE models where weights must be constant within establishments and over time
	* use weight_nosize because estsize is automatically factored in by the no of employees per establishment
gen persweight = weight_nosize	/* this is just shorthand for weight_size / nrftemployees! */
label variable persweight "person weight"

* now plug that weight into person data
keep estid jahr persweight weight_*
merge 1:m estid jahr using "$data/linked_imputed_1993-2008.dta"
	drop _merge
save "$data/linked_imputed_1993-2008.dta", replace
log close
*** END ***









